import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# 首次运行时需要下载停用词和分词模型
nltk.download('stopwords')
nltk.download('punkt')

# 示例文本
text = "This is a sample sentence demonstrating how to remove stopwords using NLTK in Python."

# 分词处理
tokens = word_tokenize(text.lower())  # 转为小写并分词

# 加载英文停用词
stop_words = set(stopwords.words('english'))

# 过滤停用词
filtered_tokens = [word for word in tokens if word not in stop_words and word.isalnum()]

# 打印结果
print("原始文本:", text)
print("分词结果:", tokens)
print("过滤停用词后:", filtered_tokens)

# 可选：查看前20个停用词
print("\n英文停用词示例:", list(stop_words)[:20])